*----------------------------------------------------------------------------------------------------------------------*
*This program test pred1 on a reconstructed dataset from the first month of export to account for calendar year effects
	* A - Construct trade dataset
	* B - Compute residuals 
	* C - Build shocks and sigmas: HS6-level, with or without jkt in prices 
	* D - Get macro variables and polish 
	* E - Table A.18
	* F - Table A.24

*This version: December 2016
*----------------------------------------------------------------------------------------------------------------------*

*do "$Source\export_database_reconstr_fe.do"

*************************************
* 	A - Construct trade dataset 	*
*************************************

use "$Source\export2012", clear
gen hs6 = int(product/100)
collapse (sum) value quantity, by(siren country hs6 year month)
replace quantity=. if quantity==0
save temp, replace
bys siren country hs6: egen min_y = min(year)
keep if year==min_y
collapse (min) mo = month, by(siren country hs6 min_y)
replace mo = 1 if min_y<1996
keep siren country hs6 mo
sort siren country hs6
merge 1:m siren country hs6 using temp
drop _m
replace year = year-1 if month<mo
drop if year<1994
collapse (sum) value quantity, by(siren country hs6 year)
sort hs6
save "$Source\export_q_reconstr", replace
erase temp.dta

*drop hs6 that vary over hs92/96/02 classifications
rename hs1992 hs6
sort hs6
merge 1:m hs6 using "$Source\export_q", keep(2 3)
replace hs6 = hs2002 if year<1996 & hs6!=hs2002
drop hs1996 hs2002 _m
sort hs6
save "$Source\export_q", replace
use "$Source\conversion_hs929602_oneone", clear
rename hs1996 hs6
sort hs6
merge 1:m hs6 using "$Source\export_q", keep(2 3)
replace hs6 = hs2002 if year>1995 & year<2002 & hs6!=hs2002
drop hs1992 hs2002 _m
sort hs6
save "$Source\export_q", replace
use "$Source\conversion_hs929602_oneone", clear
rename hs2002 hs6
sort hs6
merge 1:m hs6 using "$Source\export_q_reconstr", keep(3)
drop _m
sort siren country hs6 year
save "$Source\export_q_reconstr", replace

** 	A2 - Start with trade data   **
use "$Source\export_q_reconstr", clear

gen prod = hs6
collapse (sum) value quantity, by(siren country prod year) 

drop if year<1994 | year>2005 /*quantity data not required from 2006 onwards*/
drop if year==2005 /* ANNEE incomplete en 2005 */

egen jkt = group(country prod year)
egen ijk = group(siren country prod)
egen ikt = group(siren prod year)

duplicates drop ijk year, force

tsset ijk year
sort  ijk year

g ln_export  = ln(value)
g dln_export = d.ln_export
g ln_uv  	 = log(value/quantity)
g ln_qty 	 = log(quantity)

label var year    		"Year"
label var siren   		"Firm id"
label var country 		"Destination market"
label var prod			"HS6 product"
label var value 		"Export value"
label var ln_export		"log export value"
label var dln_export 	"Delta log export value"
label var ln_uv	 		"log(unit value)"
label var ln_qty		"log(quantity)"
*
save "$Output\export_brv_reconstr", replace

** 	A3 - Identify various types of flows  **

/* Drop all firms under threshold within EU at any point in time (all obs, including extra-EU) */ 
/* These are firms filling simplified declarations in the EU */
/* We can't have an exhaustive idea of their dynamics in all markets */

insheet using "$Source\BDF-fusion-1994-2010-sans-zero.txt", clear delim(;)
rename v1 siren
rename v2 year
keep if year<2006
keep siren 
duplicates drop siren, force
sort siren 
merge 1:m siren using "$Output\export_brv_reconstr", keep(2 3)
g simplified    = (_m == 3)
g no_simplified = (_m == 2)
label var simplified 	"firm fills simplified declarations"
label var no_simplified "firm doesn't fill simplified declarations"
drop _m

/* EU flows */
g eu15 = (country=="DE" | country=="IT" | country=="AT" | country=="BE" | country=="FI" | country=="GB" | country=="GR" | country=="IE" | country=="NL" | country=="PT" | country=="SE" | country=="ES" | country=="DK" | country=="LU" )
g eu25 = (country=="DE" | country=="IT" | country=="AT" | country=="BE" | country=="FI" | country=="GB" | country=="GR" | country=="IE" | country=="NL" | country=="PT" | country=="SE" | country=="ES" | country=="DK" | country=="LU" | ((country == "HU" | country == "PL" | country =="CZ" | country =="SL" | country =="SK" | country =="LT" | country =="LV" | country =="EE" | country =="MT" | country =="CY") & year > 2003))
label var eu15 "flow to EU15"
label var eu25 "flow to EU25"

/* Flows within EU smaller than 1000 EUR */
g small_flow = (value<1000)
label var small_flow "flow<1000 euros"

/* Firms always below threshold overall but declaring anyway */
g eu25flow = value if eu25 == 1
bys siren year: egen sum_euflow = sum(eu25flow)
bys siren year: egen max_eu25 = max(eu25)
replace sum_euflow = . if max_eu25 == 0
sum sum_euflow, d
g cutoff  = . 
replace cutoff  = 38100  if year < 2001
replace cutoff  = 99100  if year == 2001
replace cutoff  = 100000 if year > 2001
g below_cut = 1 if sum_euflow < cutoff
bys siren: egen max_below_cut = max(below_cut)
drop cutoff sum_euflow max_eu25 below_cut eu25flow
label var max_below_cut "firm always below cutoff but declaring"

sort siren country prod year
compress
sort siren country prod year
save "$Output\export_brv_reconstr", replace

/* Create dataset */
use "$Output\export_brv_reconstr", clear
* drop simplified (firm doesn't fill simplified declarations)
drop if simplified == 1 
* drop also small flows (lower than 1000 euros, for consistency between intra and extra EU flows 
drop if small_flow == 1 
sort siren country prod year
save "$Output\export_brv_reconstr", replace


** 	A4 - Construct experience for each dataset	 **

cd "$Output"
foreach dataset in export_brv_reconstr {
	use `dataset', clear

	collapse (sum) value, by(siren country prod year)
	bys siren country prod: egen min = min(year)
	gen age_ele   = year-min+1
	gen entry_ele = min 

	* reset age=0 if exit 1 year *
	sort siren country prod year
	egen id=group(siren country prod)
	tsset id year
	gen min1  	 = min
	replace min1 = year if value!=. & l1.value==. & year>1995
	replace min1 = l1.min1 if value!=. & l1.value!=.
	gen age_ele1 = year-min1+1
	gen entry_ele1 = min1
	* reset age=0 if exit 2 years *
	sort id year
	replace min  = year if value!=. & l1.value==. & l2.value==. & year>1996
	replace min  = l1.min if value!=. & l1.value!=.
	replace min  = l2.min if value!=. & l1.value==. & l2.value!=.
	replace min  = l1.min if value!=. & l1.value!=.
	sort id min year
	by id min: gen age_ele2 = _n
	gen entry_ele2 = min

	*age as count of years of export
	sort id year
	bys id: gen age_ele3 = _n
	gen entry_ele3 = entry_ele

	label var entry_ele "Year of first entry by dest*prod"
	label var entry_ele1 "Year of entry / reset if exit 1 year"
	label var entry_ele2 "Year of entry / reset if exit 2 years"
	label var entry_ele3 "Year of first entry by dest*prod"
	label var age_ele   "Experience by dest*prod: years since first entry"
	label var age_ele1 	"Experience by dest*prod / reset if exit 1 year"
	label var age_ele2 	"Experience by dest*prod / reset if exit 2 years"
	label var age_ele3  "Experience by dest*prod: count of years of export"

	keep siren country prod year entry_* age_*
	sort siren country prod year
	save experience_ele_reconstr, replace

	use `dataset', clear
	/* merge with experience */
	sort siren country prod year
	merge 1:1 siren country prod year using experience_ele_reconstr, keep(3)
	di "I (still) don't care"
	drop _merge
	
	/* create max age */
	bys siren country prod: 			egen age_ele_max	= max(age_ele)
	bys siren country prod entry_ele1: 	egen age_ele1_max 	= max(age_ele1)
	bys siren country prod entry_ele2: 	egen age_ele2_max 	= max(age_ele2)
	bys siren country prod: 			egen age_ele3_max	= max(age_ele3)

	*define last year before exit
	sort ijk year
	bys ijk: g exit_f1 	= (year[_n+1]!=year[_n]+1)
	replace exit_f1  	= . if year == 2005

	* lag value and quantity
	sort ijk year
	g value_l1       = l.value
	replace value_l1 = 0 if value_l1 == . & age_ele1 == 1
	g quantity_l1 		= l.quantity
	replace quantity_l1 = 0 if quantity_l1 == . & age_ele1 == 1

	label var age_ele_max	"max age_ele"
	label var age_ele1_max	"max age_ele1"
	label var age_ele2_max	"max age_ele2"
	label var age_ele3_max	"max age_ele3"
	label var value_l1		"Export value(t-1)"
	label var quantity_l1	"quantity(t-1)"
	label var exit_f1 		"Dummy for exit in t+1"
	label var siren			"firm id"

	compress
	drop if year<1996 /*never use 1994 & 1995 as looking at growth rates*/
	save `dataset', replace
	
	}

cd "$base"


*************************
* B - Compute residuals *
*************************
*
use "$Output\export_brv_reconstr", clear
keep siren year country prod ln_export ln_qty ln_uv jkt ikt ijk 
keep if ln_uv != . & ln_qty != . & ln_export != . /* singletons are dropped directly by reghdfe */

/* quantities */
reghdfe ln_qty, absorb(jkt ikt) residuals(res_fe_qty)

/* prices, without jkt */
areg ln_uv, a(ikt) 
predict res_fe_uv_nojkt, res

label var res_fe_qty 		"Quantities residuals, FE ikt jkt"
label var res_fe_uv_nojkt	"Prices residuals, FE ikt"
*
drop if res_fe_qty==.
*
save "$Output\temp_res_fe", replace


*************************************************************************
* C - Build shocks and sigmas: HS6-level, without jkt in prices *
*************************************************************************

use "$Output\temp_res_fe", clear
keep siren country prod year res_* 
sort siren country prod year 
merge 1:1 siren country prod year using "$Output\export_brv_reconstr", keep(3)
drop _m
destring prod, replace
compress
sort siren country prod year 
save $Output\use_pred3_hs6, replace
*
** compute shock from reg of res_q on res_uv: BY HS6 **
*keep only ijk with at least 2 observations because ijk FE (not necessarily consecutive as was done previously)
tsset ijk year
egen count_ijk = count(res_fe_uv_nojkt), by(ijk)
keep if count_ijk > 1	
* keep only products with at least 10 observations
egen nbr = count(res_fe_uv_nojkt), by(prod)
drop if nbr<10
drop nbr 
keep year siren country prod ijk res_* 

gen shock_nojkt      		= .
gen shock_nojkt_nosign      = .

gen sigma_nojkt      		= .
gen sigma_sign_nojkt 		= .

egen s = group(prod)
summarize s, d
local s_max = r(max)
save temp, replace

forvalues s=1(1)`s_max' {
	use temp, clear
	keep if s==`s'
	di `s'
	/*without jkt*/
	qui areg res_fe_uv_nojkt res_fe_qty , r a(ijk)
	qui predict res if e(sample), dresiduals
	qui replace sigma_nojkt 	   		= -1 /_b[res_fe_qty] if e(sample)
	qui gen     t_stat_nojkt    	  	= abs(_b[res_fe_qty]/_se[res_fe_qty]) if e(sample)
	qui replace sigma_sign_nojkt   		= sigma_nojkt if e(sample)  & t_stat_nojkt>1.96 & t_stat_nojkt != .
	qui replace shock_nojkt 	   		= res if e(sample) 			& t_stat_nojkt>1.96 & t_stat_nojkt != .
	qui replace shock_nojkt_nosign   	= res if e(sample)
	qui drop res t_stat_nojkt 
	
	if `s'!=1 {
		append using $Output\shocks_fe_reconstr
		}
	save $Output\shocks_fe_reconstr, replace
}


* trim
foreach var in sigma_sign_nojkt {
	egen trim99 = pctile(`var'), p(99)  
	egen trim01 = pctile(`var'), p(1)  
	gen `var'_trim = `var' if `var'>trim01 & `var'<trim99
	drop trim*
	}
*
* keep shocks for meaningfull sigmas, i.e.>=1 
gen shock_nojkt_trim 			= shock_nojkt 		if sigma_sign_nojkt >= 1 & sigma_sign_nojkt !=.
*
drop s 
compress
sort year siren country prod 
cd "$base"

save $Output\shocks_fe_reconstr, replace
* 
erase temp.dta
*
drop res*
replace year = year+1
sort year siren country prod 
merge 1:1 year siren country prod using $Output\use_pred3_hs6, keep(2 3)
drop _merge
* compute delta prior from res_q
tsset ijk year 
g dprior        = res_fe_qty - l.res_fe_qty
label var dprior	"Delta Prior"
sort year siren country prod 
save "$Output\temp_res_fe", replace


**************************************
* D - Get macro variables and polish *
**************************************
*
use "$Output\temp_res_fe", clear
/* country codes */
rename country iso2
sort iso2
merge m:1 iso2 using "$Source\country_iso", keep(3)
drop _m country 
rename iso2 country
/* time to ship */
sort iso3
merge m:1 iso3 using "$Source\distsea_2012", keep(3)
drop _m 
/* distance */
sort iso3
merge m:1 iso3 using "$Source\dist_cepii_fr", keep(3)
drop _m 
/* Polishing*/
drop distcap distw distwces smctry col45 curcol comcol colony comlang_ethno comlang_off contig
drop sigma_nojkt 
drop age_ele_max age_ele
*
label var shock_nojkt 				"Shock(t-1), no jkt in p"
label var shock_nojkt_trim 			"Shock(t-1), no jkt in p, trimmed"
label var shock_nojkt_nosign		"Shock(t-1), no jkt in p, with insign. beta"
label var sigma_sign_nojkt_trim		"Sigma, no jkt in p, sign. only, trimmed"
*
tsset ijk year 
* compute shock = sigma*shock - prior t-1
gen diff  = sigma_sign_nojkt*shock_nojkt_trim - l.res_fe_qty
label var diff "shock_nojkt_trim - prior in t-1"
gen diff_trim  = sigma_sign_nojkt_trim*shock_nojkt_trim - l.res_fe_qty
label var diff_trim "shock_nojkt_trim - prior in t-1, sigma trimmed"
*
sort country prod year
save "$Output\dataset_brv_fe_reconstr", replace

/*Get total exports (for market shares controls)*/
use "$Output\export_brv_reconstr", clear
collapse (sum) value_tot = value quantity_tot=quantity, by(country prod year)
sort country prod year
merge 1:m country prod year using "$Output\dataset_brv_fe_reconstr"
drop _merge

sort ijk year
g quantity_tot_l1 = l1.quantity_tot
g value_tot_l1    = l1.value_tot

*label var count_ijk       			"number of obs, ijk"
label var shock_nojkt     			"a/sigma(t-1), no jkt in p"
label var shock_nojkt_nosign    	"a/sigma(t-1), excl. insign. beta, no jkt in p"
label var shock_nojkt_trim			"a/sigma(t-1), no jkt in prices, clean"
label var sigma_sign_nojkt			"Sigma, no jkt in p, sign."
label var sigma_sign_nojkt_trim		"Sigma, sign., no jkt in p, trimmed"
label var res_fe_qty      			"Residuals quantities, FE ikt jkt"
label var res_fe_uv_nojkt 			"Residuals prices residuals, FE ikt"
label var value						"Export value"
label var quantity        			"Export quantity"
label var dprior          			"$\Delta\varepsilon^q_{ijkt}$"
label var distsea_new     			"Time to ship"
label var value_tot       			"Total export value, jkt"
label var quantity_tot    			"Total export quantity, jkt"
label var value_tot_l1     			"Total export value, jkt, t-1"
label var quantity_tot_l1  			"Total export quantity, jkt, t-1"
*note: all following labels are the same (for tables) - but variables are different, see computations above
label var diff            			"$\widehat{a}_{ijkt}-\varepsilon^q_{ijk,t-1}$"

*Compute interactions for tables

foreach def in ele1{
	gen diff_`def'      			= diff*age_`def'
	gen age_`def'10 				= age_`def'
	replace age_`def'10				= 9 if age_`def'>8
	tab age_`def'10, gen(`def'_)
	forvalues x = 1(1)9 {
		g diff_`def'_`x' 	= diff * `def'_`x' 
		}
	label var  age_`def'		"Age$ _{ijkt}$" 
	label var diff_`def'		"\hspace{1cm} $\times$ Age$ _{ijkt}$"
	label var diff_`def'_1		"\hspace{1cm} $\times$ Age$ _{ijkt}=1$" 
	label var diff_`def'_2		"\hspace{1cm} $\times$ Age$ _{ijkt}=2$" 
	label var diff_`def'_3		"\hspace{1cm} $\times$ Age$ _{ijkt}=3$" 
	label var diff_`def'_4		"\hspace{1cm} $\times$ Age$ _{ijkt}=4$"
	label var diff_`def'_5		"\hspace{1cm} $\times$ Age$ _{ijkt}=5$" 
	label var diff_`def'_6		"\hspace{1cm} $\times$ Age$ _{ijkt}=6$"
	label var diff_`def'_7		"\hspace{1cm} $\times$ Age$ _{ijkt}=7$"
	label var diff_`def'_8		"\hspace{1cm} $\times$ Age$ _{ijkt}=8$" 
	label var diff_`def'_9		"\hspace{1cm} $\times$ Age$ _{ijkt}=9$" 
	*label var diff_`def'_10		"\hspace{1cm} $\times$ Age$ _{ijkt}=10$" 

}

egen i = group(siren)
sort ijk year	
save "$Output\dataset_brv_fe_reconstr", replace
*
erase "$Output\temp_res_fe.dta"
erase "$Output\use_pred3_hs6.dta"
*

******************************
* E - Test prediction 1 *
******************************
*
log using "$results\prediction1_reconstr_fe.log", replace

use $Output\dataset_brv_fe_reconstr, clear
global condition  "entry_ele!=1994 & entry_ele!=1995"

foreach def in ele1 {

	foreach var in diff {

		eststo: reg dprior `var' age_`def'				if $condition, r cluster(i)
		eststo: reg dprior `var' age_`def' `var'_`def'  if $condition, r cluster(i)
		eststo: reg dprior `var' age_`def' `var'_`def'  if $condition, vce(bootstrap) 
		eststo: reg dprior `var'_`def'_* `def'_* 	    if $condition, r cluster(i)
		*eststo: reg dprior `var'_`def'_* `def'_* 	    if $condition, vce(bootstrap) 
		drop `var'_`def'_9	`def'_9	
		eststo: reg dprior `var' `var'_`def'_* `def'_* 	   if $condition, r cluster(i)
		*eststo: reg dprior `var' `var'_`def'_* `def'_* 	   if $condition, vce(bootstrap) 
		
		set linesize 250
		esttab, mtitles drop(_cons) b(%5.3f) se(%5.3f) compress r2 starlevels(c 0.1 b 0.05 a 0.01)  se 
		esttab, mtitles drop(_cons) b(%5.3f) se(%5.3f) r2  starlevels({$^c$} 0.1 {$^b$} 0.05 {$^a$} 0.01) se tex label  title() 
		eststo clear
	}
}



***************************************************************************
* F - Level :  qty and UV for main definition of experience, no jkt in prices *
***************************************************************************

foreach res in "res_fe" {
	foreach age in "age_ele1" {
	*
	use $Output\dataset_brv_fe_reconstr, clear
	tab `age', gen(aged)
	replace aged10 = 1 if `age'>=10
	drop aged11
	tab year, gen(yeard)
	*
	gen size_l1	= log(quantity_l1/quantity_tot_l1)
	*
	*label var `res'      "Demand shock"
	*
	global condition     "entry_ele!=1994 & entry_ele!=1995 "
	*
	sort ijk year
	*
 	label var `age' 	"Age$ _{ijkt}$" 
	label var aged1		"Age$ _{ijkt}=1$" 
	label var aged2		"Age$ _{ijkt}=2$" 
	label var aged3		"Age$ _{ijkt}=3$" 
	label var aged4		"Age$ _{ijkt}=4$" 
	label var aged5		"Age$ _{ijkt}=5$" 
	label var aged6		"Age$ _{ijkt}=6$" 
	label var aged7		"Age$ _{ijkt}=7$" 
	label var aged8		"Age$ _{ijkt}=8$" 
	label var aged9		"Age$ _{ijkt}=9$" 
	*label var aged10	"Age$ _{ijkt}=10$" 
	*
	eststo: reg `res'_qty   		`age'       	if $condition, ro cluster(i) 
	eststo: reg `res'_qty    		aged2-aged9     if $condition, ro cluster(i) 
	test aged3 = aged4
	test aged3 = aged5
	test aged3 = aged6
	test aged3 = aged7
	test aged4 = aged5
	test aged4 = aged6
	test aged4 = aged7
	test aged5 = aged6
	test aged5 = aged7
	test aged6 = aged7

	eststo: areg `res'_qty    	aged2-aged9     if $condition , a(ijk) cluster(i)
	eststo: reg `res'_uv_nojkt  `age'        	if $condition  & e(sample), ro cluster(i)
	eststo: reg `res'_uv_nojkt 	aged2-aged9   	if $condition  & e(sample), ro cluster(i)
	eststo: areg `res'_uv_nojkt aged2-aged9     if $condition  & e(sample), a(ijk) cluster(i)
	/* including size (t-1) as control */
	eststo: reg `res'_uv_nojkt aged3-aged9 size_l1	    if $condition & e(sample), cluster(i)
	eststo: areg `res'_uv_nojkt aged3-aged9 size_l1	if $condition & e(sample), a(ijk) cluster(i)
	set linesize 250
	esttab, mtitles drop(_cons) b(%5.3f) se(%5.3f) compress r2 starlevels(c 0.1 b 0.05 a 0.01)  se 
	esttab, mtitles drop(_cons) b(%5.3f) se(%5.3f) r2  starlevels({$^c$} 0.1 {$^b$} 0.05 {$^a$} 0.01) se tex label  title() 
	eststo clear
	}
}


log close

